import os
from pylab import *

directory = "enhancer_predictions_randomized"

data = {"HiSeq": [[0, 0]],
        "CAGE": [[0, 0]],
       }

filenames = []
for filename in os.listdir(directory):
    terms = filename.split(".")
    assert terms[0] == "enhancers"
    dataset = terms[1]
    total = int(terms[2])
    assert terms[3] == "bed"
    filenames.append([dataset, total, filename])

filenames.sort()
for dataset, total, filename in filenames:
    path = os.path.join(directory, filename)
    print("Reading", path)
    stream = open(path)
    n = 0
    for line in stream:
        n += 1
    stream.close()
    data[dataset].append([total, n])

for dataset in data:
    data[dataset].sort()
    data[dataset] = array(data[dataset])

plot(data['HiSeq'][:, 0]/1000000, data['HiSeq'][:, 1], color='red', label='Short capped RNAs (single-end libraries)')
plot(data['CAGE'][:, 0]/1000000, data['CAGE'][:, 1], color='blue', label='Long capped RNAs (CAGE libraries)')
legend(fontsize=8)
xticks(fontsize=8)
yticks(fontsize=8)
xlabel("Number of sequenced reads [millions]", fontsize=9)
ylabel("Number of predicted enhancers", fontsize=9)
xlim(0)
ylim(0)

filename = "figure_enhancer_predictions_randomized.png"
print("Saving figure as", filename)
savefig(filename)

filename = "figure_enhancer_predictions_randomized.svg"
print("Saving figure as", filename)
savefig(filename)
